package com.chenlb.mmseg4j;

/**
 * 正向最大匹配的分词方式.
 *
 * @author chenlb 2009-3-16 下午09:07:36
 */
public class SimpleSeg extends Seg
{
	public SimpleSeg(Dictionary dic)
	{
		super(dic);
	}
	
	public Chunk seg(Sentence sen)
	{
		CharNode cn = null;
		char ch = 0;
		
		Chunk chunk = new Chunk();
		char[] chs = sen.getText();
		for (int k = 0; k < 3 && !sen.isFinish(); k++)
		{
			int offset = sen.getOffset();
			int maxLen = 0;
			
			ch = chs[offset];
			cn = dic.head(ch);
			
			maxLen = dic.maxMatch(cn, chs, offset);
			// 如果被分成了单个字,并且是英文或者数字,就继续向后找相连的数字和字母
			if (maxLen == 0 && (MMSeg.isLetterOrDigit(ch)))
			{
				int cnt = skipChars(chs, offset);
				if (cnt > 0) maxLen = cnt;
			}
			
			chunk.words[k] = new Word(chs, sen.getStartOffset(), offset, maxLen + 1);
			offset += maxLen + 1;
			sen.setOffset(offset);
		}
		
		return chunk;
	}
}
